library(here)
setwd(here())
load(file = "data/intermediate/pf_results.rda")

library(dplyr)
library(mellonMisc)
library(readxl)

#### CMP comparison ####

library(haven)
library(manifestoR)
# get an API key at https://manifesto-project.wzb.eu/
mp_setapikey("data/raw/manifesto_apikey.txt")
cmp <- mp_maindataset()
vars <- colnames(cmp)[grepl("^per[[:digit:]]", colnames(cmp))]
exclude.vars <- sapply(strsplit(vars[grepl("_", vars)], "_"), function(x) x[1])
vars <- vars[!vars %in% exclude.vars]
cmp <- cmp %>% filter(country==51 & edate=="2017-06-08" & partyabbrev=="Conservatives")

cmpmii <- read_excel("data/raw/cmpToBESMII.xlsx", 1)

cmpmii$code <- as.character(cmpmii$code)
cmpmii$code <- gsub("\\.", "_", cmpmii$code)

cmpmii <- cmpmii[cmpmii$code!=0, ]
cmp <- t(cmp[, paste0("per", cmpmii$code)])
cmpmii$percent <- cmp[match(cmpmii$code, gsub("per", "", rownames(cmp)))]

con.manifesto <- read_excel("data/raw/2017_con_manifesto.xlsx")
con.manifesto$eu_possible <- as.numeric(con.manifesto$eu_possible)
prop.table(table(con.manifesto$eu_possible))
con.manifesto$cmp_code[con.manifesto$cmp_code=="H"] <- NA

con.manifesto$eu.cmp <- con.manifesto$cmp_code %in% c("108.0", "110.0")


#### 1. Issue comparison ####

iss.areas <- dtf(Conjoint = prop.table(tapply(pf$Weight, pf$agreed_cat_string, sum)), 
                 Conjoint.con = prop.table(tapply(pf$Weight.c, pf$agreed_cat_string, sum)), 
                 Count = prop.table(table(pf$agreed_cat_string)))

iss.areas$Issue <- iss.areas$Count.Var1
iss.areas$Count.Var1 <- NULL
colnames(iss.areas)[colnames(iss.areas)=="Count.Freq"] <- "Equal"

iss.full <- iss.areas

small.issues <- names(which(sort(rowSums(iss.areas[, 1:3])) < 0.03))
small.issues  <- c(small.issues, "other")
oth.tots <- colSums(iss.areas[iss.areas$Issue %in% small.issues, 1:3])

oth.tots <- dtf(t(matrix(oth.tots)), "Other")

colnames(oth.tots) <- c("Conjoint", "Conjoint.con", "Equal", "Issue")

iss.areas <- rbind(iss.areas[!iss.areas$Issue %in% small.issues, ], oth.tots)

iss.order <- names(sort(rowSums(iss.areas[-nrow(iss.areas), 1:3]), decreasing = T))
iss.order <- c(iss.order, "Other")

iss.areas$Issue <- factor(iss.areas$Issue, levels = iss.order[length(iss.order):1])
library(reshape2)
iss.areas <- melt(iss.areas)
iss.areas$Weighting <- iss.areas$variable
levels(iss.areas$Issue) <- Hmisc::capitalize(levels(iss.areas$Issue))

lb.media <- read_excel("data/raw/loughborough_media_results.xlsx", 1)
lb.lookup <- read_excel("data/raw/cmpToBESMII.xlsx", "bes_lboro")
lb.comb.lookup <- read_excel("data/raw/cmpToBESMII.xlsx", "lboro_combined")

cmpmii$lboro <- lb.lookup$lborough[match(cmpmii$bes_reconciled, lb.lookup$MII_cat1)]

library(scales)
library(ggplot2)
iss.area.plot <- ggplot(iss.areas, aes(x = Issue, group = Weighting, 
                                       y = value, fill = Weighting)) +
  geom_bar(stat= "identity", position = "dodge", width=0.6) + 
  scale_y_continuous(labels = percent, expand = c(0,0)) + 
  xlab("Issue area") + ylab("Percentage of manifesto") +
  coord_flip() + theme_bes() + scale_fill_brewer(palette= "Blues")


bes.mii <- read_stata("data/raw/besmiiw12w13.dta")
bes.mii <- bes.mii %>% filter(!is.na(bes.mii$wt_new_W12) | !is.na(bes.mii$wt_new_W13))
bes.mii$miiW13 <- as.character(as_factor(bes.mii$mii_catW13))
bes.mii$miiW13[bes.mii$miiW13 %in% c("Referendum unspecified", "uncoded", "election outcome", "economy-personal",
                                     "partisan-neg", "pol-neg")] <- NA

bes.d <- makeDesign(data = dtf(bes.mii), id.var = "id", weight.var = "wt_new_W13")
library(survey)
issues <- prop.table(svytable(design = bes.d, ~miiW13))

iss.full$BES <- issues[match(iss.full$Issue, names(issues))]

iss.full$BES[iss.full$Issue=='other'] <- 
  iss.full$BES[iss.full$Issue=='other'] + (1 -sum(iss.full$BES))

iss.full$BES <- as.numeric(iss.full$BES)
iss.full$Conjoint <- as.numeric(iss.full$Conjoint)
iss.full$important <- iss.full$Conjoint>0.05 | iss.full$Equal>0.05 | iss.full$BES>0.05

levels(iss.full$Issue) <- Hmisc::capitalize(levels(iss.full$Issue))
library(ggrepel)
iss.conjoint.bes <- ggplot(iss.full[iss.full$important, ], aes(x = BES, y = Conjoint, label = Issue)) + 
  geom_point() + 
  geom_text_repel()  + 
  scale_y_continuous(labels = percent, limits = c(0,0.5)) + 
  scale_x_continuous(labels = percent, limits = c(0,0.5)) + theme_bes() + 
  geom_abline(intercept = 0 , slope = 1)  + ggtitle("Conjoint weighting") + 
  xlab("% MII for 2017 BES respondents") + ylab("% of Conservative manifesto")

iss.conjoint.con.bes <- ggplot(iss.full[iss.full$important, ], aes(x = BES, y = Conjoint.con, label = Issue)) + 
  geom_point() + 
  geom_text_repel()  + 
  scale_y_continuous(labels = percent, limits = c(0,0.5)) + 
  scale_x_continuous(labels = percent, limits = c(0,0.5)) + theme_bes() + 
  geom_abline(intercept = 0 , slope = 1)  + ggtitle("Conjoint weighting") + 
  xlab("% MII for 2017 BES respondents") + ylab("% of Conservative manifesto")


iss.equal.bes <- ggplot(iss.full[iss.full$important, ], 
                        aes(x = BES, y = Equal, label = Issue)) + 
  geom_point() + 
  geom_text_repel()  + 
  scale_y_continuous(labels = percent, limits = c(0,0.5)) + 
  scale_x_continuous(labels = percent, limits = c(0,0.5)) + theme_bes() + 
  geom_abline(intercept = 0 , slope = 1) + ggtitle("Equal weighting") + 
  xlab("% MII for 2017 BES respondents") + ylab("% of Conservative manifesto")

library(gridExtra)
conjoint.equal.mii <- arrangeGrob(iss.equal.bes , iss.conjoint.bes, iss.conjoint.con.bes, ncol = 3)
plot(conjoint.equal.mii)
saveForPub(conjoint.equal.mii, file = "figures/conjoint.equal.mii", 
           width = 15, height = 5)

iss.full$lboro <- lb.lookup$lborough[match(rownames(iss.full), lb.lookup$MII_cat1)]

lboro.comp <- aggregate(iss.full[, c("Conjoint", "Conjoint.con", "Equal", "BES")], 
          list(Issue = iss.full$lboro), sum)
lboro.comp$Media <- lb.media$`media reweighted`[match(lboro.comp$Issue, lb.media$Issue)]

lboro.comp$CMP <- tapply(cmpmii$percent, cmpmii$lboro, sum)[lboro.comp$Issue]
lboro.comp$CMP[is.na(lboro.comp$CMP)] <- 0

lboro.comp2 <- lboro.comp
lboro.comp2$Issue2 <- lb.comb.lookup$combined[match(lboro.comp2$Issue, lb.comb.lookup$lboro)]
lboro.comp2$Issue <- NULL

lboro.comp2 <- aggregate(lboro.comp2[, colnames(lboro.comp2)!="Issue2"], list(lboro.comp2$Issue2), sum)
lboro.comp2$CMP <- lboro.comp2$CMP / 100
cor.mat.lboro2 <- round(cor(lboro.comp2[-1]), 2)


cor.mat.lboro <- round(cor(lboro.comp[-1]), 2)


cor.mat.lboro
cor(iss.full[, c("Conjoint", "Equal", "BES")])
cor(iss.full[iss.full$Issue!="europe", c("Conjoint", "Equal", "BES")])



cor.mat.lboro <- round(cor(lboro.comp[-1]), 2)

saveForPub(iss.area.plot.pres, file = "figures/iss.area.plot.pres", width = 7, height = 4)
saveForPub(iss.area.plot, file = "figures/iss.area.plot", width = 8, height = 5)
saveForPub(iss.area.plot + scale_fill_manual(values = c("#f8e71c", "#01afd1")) + xlab(""),
           file = "figures/iss.area.plot.pres", width = 8, height = 5)





#### Output ####

# Table 2 Correlation matrix (12 issue areas) of conjoint-weighted promises, 
# equally-weighted promises, BESIP MII and media coverage. 
subset <- c("Conjoint", "Equal", "BES", "Media")
cor.mat.lboro <- cor.mat.lboro[subset, subset]

renamesubs <- c("Promises: centrality", "Promises: equal", "MII", "Media")
rownames(cor.mat.lboro) <- renamesubs
colnames(cor.mat.lboro) <- renamesubs
write.csv(cor.mat.lboro, file = "tables/cor.mat.lboro.csv")


# footnote:
# Only 2% of quasi-sentences in the manifesto are coded into either EU category. 
#appendix H: corresponds to 2% of the total manifesto content. 
prop.table(table(con.manifesto$eu.cmp))
# Even with a maximal definition of what counts as an EU-related statement, 
# only 7% of quasi-sentences relate to the EU.
prop.table(table(con.manifesto$eu_possible))


# Appendix H: Using the CMP coding of issues, 35 quasi-sentences are statements
# that fall into the two European integration categories (108 and 110). 
table(con.manifesto$eu.cmp)
# We also recoded the 1612 
nrow(con.manifesto)
# When we took this more expansive coding, we found 105 quasi-sentences that 
# were at least somewhat related to Europe: 
table(con.manifesto$eu_possible)
# 7% of all quasi-sentences.
prop.table(table(con.manifesto$eu_possible))

# While this is higher than the 2% that CMP originally coded
prop.table(table(con.manifesto$eu.cmp))

# Table 3 Correlation matrix between the 9 issue areas tracked by the Loughborough 
# media tracking project across the conjoint weighted Conservative manifesto, 
# the unweighted Conservative manifesto, most important issue (MII) responses of
# BESIP respondents in post-election wave, Media coverage and CMP issue coding. 

write.csv(cor.mat.lboro2, file = "tables/cor.mat.lboro2.csv")

# The CMP measure is strongly correlated with the equal issue weighting of
# promises (r=0.81) 
cor.mat.lboro2["Equal", "CMP"]

# but is actually negatively correlated with the issue centrality from the 
# conjoint experiment whether measured with the whole population or 2017
# Conservative voters. 
cor.mat.lboro2["Conjoint", "CMP"]
cor.mat.lboro2["Conjoint.con", "CMP"]
